************************************************
***Title: heterogeneity_by_weather_association.do
***Creators: Joelle Abramowitz, Shooshan Danagoulian, and Owen Fleming*
***Notes: This file produces the main estimates of the effect of pollen exposure on suicide stratified by the strength of association between weather and pollen in a given locality. 

*For questions, contact
*Owen Fleming
*hg3490@wayne.edu
************************************************


**********SETUP
use data/for_analysis, clear

levelsof county
global county_levels = r(levels)

preserve
foreach i in $county_levels {
eststo, title("`i'"): reg pollen $weather if county == `i' 

scalar r2 = e(r2)
scalar county = `i'
matrix r2_`i' = county , r2
matrix colnames r2_`i' = "county" "r2"
	
}

matrix r2 = . , .
matrix colnames r2 = "county" "r2"
foreach i in $county_levels {
matrix r2 = r2 \ r2_`i'
}

clear
svmat double r2, names(col)
drop if missing(r2)
egen median_r2 = median(r2)
save data/r2, replace
restore

merge m:1 county using data/r2 
drop if missing(r2)
erase data/r2.dta


**********PRODUCE ESTIMATES
*Treatment: location-season quartiles, r2 < 0.075
eststo main_s3_less_75: ppmlhdfe count pollen_q2_ls pollen_q3_ls pollen_q4_ls $weather if r2 < 0.075, absorb(county year_month month_day) cluster(county) tolerance(1e-06)
estadd ysumm
estadd scalar counties = e(N_clust)
estadd local Controls "Weather" 
estadd local FE "County, Year x Month, Month x Day"

*Treatment: location-season quartiles, r2 >= 0.075
eststo main_s3_greater_75: ppmlhdfe count pollen_q2_ls pollen_q3_ls pollen_q4_ls $weather if r2 >= 0.075, absorb(county year_month month_day) cluster(county) tolerance(1e-06)
estadd ysumm
estadd scalar counties = e(N_clust)
estadd local Controls "Weather" 
estadd local FE "County, Year x Month, Month x Day"

*Treatment: location-season quartiles, r2 < 0.1
eststo main_s3_less_10: ppmlhdfe count pollen_q2_ls pollen_q3_ls pollen_q4_ls $weather if r2 < 0.1, absorb(county year_month month_day) cluster(county) tolerance(1e-06)
estadd ysumm
estadd scalar counties = e(N_clust)
estadd local Controls "Weather" 
estadd local FE "County, Year x Month, Month x Day"

*Treatment: location-season quartiles, r2 >= 0.1
eststo main_s3_greater_10: ppmlhdfe count pollen_q2_ls pollen_q3_ls pollen_q4_ls $weather if r2 >= 0.1, absorb(county year_month month_day) cluster(county) tolerance(1e-06)
estadd ysumm
estadd scalar counties = e(N_clust)
estadd local Controls "Weather" 
estadd local FE "County, Year x Month, Month x Day"


**********EXPORT
estout using results/heterogeneity_by_weather_association.xls, cells(b(star label(Coef.) fmt(4)) se(par(`"="("'`")""') label(Std. Err.) fmt(4))) stats(ymean N counties Controls FE) starlevels(* 0.1 ** 0.05 *** 0.01) keep(pollen_q2_ls pollen_q3_ls pollen_q4_ls) legend label replace 
eststo clear


